home *** CD-ROM | disk | FTP | other *** search
- Path: news.ucdavis.edu!quad!knight
- From: knight@quad.cs.ucdavis.edu (James Knight)
- Newsgroups: comp.lang.c,comp.unix.programmer
- Subject: Re: Q: '\n' character - Making a better fgets?
- Followup-To: comp.lang.c,comp.unix.programmer
- Date: 18 Apr 1996 22:28:10 GMT
- Organization: University of California, Davis
- Message-ID: <4l6flq$rck@mark.ucdavis.edu>
- References: <31616F63.481D@lava.weeg.uiowa.edu> <4jtddt$eu7@masala.cc.uh.edu> <DpBuF6.83C@ukpsshp1.serigate.philips.nl> <3169994D.665ACF69@cs.ucl.ac.uk>
- NNTP-Posting-Host: quad.cs.ucdavis.edu
- X-Newsreader: TIN [version 1.2 PL2]
-
-
- The experiment I mentioned on varying fgets/fputs and fgetc/fputc was
- run using a simple implementation of the reverse program (i.e.,
- reverse the characters of each line). I don't have the original
- experiment results except on paper, but I've recreated the experiment.
-
- For each variation of the program, I ran it using two inputs, a 4.5MB
- file containing all short lines (i.e., 80 chars or less) and a 1MB
- file consisting of a single line. The variations were:
-
- 1) using fgetc/fputc to do I/O
- 2) using getc/putc to do I/O
- 3) using fgets/fputs to do I/O (limited to lines of 256 chars or less)
- 4) using my_getline/fputs to do I/O
- 5) using freadln/fputs to do I/O
- 6) same as 4, except reimplementing my_getline to use getc
- 7) an optimized version of 6
- 8) using read/write to read and write the complete file
-
- The code for these examples is given below.
-
- I compiled each of them using "gcc -O2 -p" using gcc 2.6.3 on a
- Decstation 3100/240 running Ultrix 4.3A. I then ran each of them four
- times on each input, taking the "prof" time as the total running
- time (except that the "prof" time does not reflect the actual running
- time, because the Decstations don't adjust for the clock speed. But
- the relative differences in the scores should reflect the actual
- running time). The time in the table is the average of the middle two
- runs (I drop the top and bottom).
-
- 4.5MB, 1MB,
- short lines one line
- ----------- --------
- 1) fgetc/fputc 23.00 5.32
- 2) getc/putc 17.56 4.09
- 3) fgets/fputs 17.80 N/A
- 4) my_getline 17.52 3.96
- 5) freadln 19.70 4.20
- 6) reimplement 19.82 4.44
- 7) optimize 18.22 4.22
- 8) read/write 9.65 1.46
-
-
- Now, this actually suprised the heck out of me, so I reran the test
- for 2, 3, 4 and 5 a number of times. Even though I wrote my_getline,
- I never expected it to be as fast as the getc/putc version. Could
- someone try to verify these results? Or look at the source for the
- programs and try to optimize 2 so that it's better than 4 (I've tried
- once without success). This can't really be right, unless the Ultrix
- creators have optimized fgets and strlen enough to offset my
- implementation.
-
- Also, you may think that the code I wrote to do the actual reversal
- isn't optimized. You would be right. You would also be missing the
- point, which is to vary only the I/O code. The reversal code gives
- the program a computational piece which does a non-trivial computation
- but which doesn't overwhelm the I/O effect as far as the profiled
- timing is concerned.
-
- Jim
-
-
- #-------------------------------
- #
- # Code for version 1
- #
- #-------------------------------
-
- #include <stdio.h>
- #include <stdlib.h>
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
-
- line = malloc(128);
- linelen = 128;
-
- len = 0;
- while ((ch = fgetc(stdin)) != EOF) {
- if (ch != '\n') {
- if (len + 1 == linelen) {
- linelen += linelen;
- line = realloc(line, linelen);
- }
-
- line[len] = ch;
- len++;
- }
- else {
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- for (i=0; i < len; i++)
- fputc(line[i], stdout);
- }
- fputc('\n', stdout);
- len = 0;
- }
- }
-
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- for (i=0; i < len; i++)
- fputc(line[i], stdout);
- }
-
- return 0;
- }
-
-
-
- #-------------------------------
- #
- # Code for version 2
- #
- #-------------------------------
-
-
- #include <stdio.h>
- #include <stdlib.h>
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
-
- line = malloc(128);
- linelen = 128;
-
- len = 0;
- while ((ch = getc(stdin)) != EOF) {
- if (ch != '\n') {
- if (len + 1 == linelen) {
- linelen += linelen;
- line = realloc(line, linelen);
- }
-
- line[len] = ch;
- len++;
- }
- else {
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- for (i=0; i < len; i++)
- putc(line[i], stdout);
- }
- putc('\n', stdout);
- len = 0;
- }
- }
-
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- for (i=0; i < len; i++)
- putc(line[i], stdout);
- }
-
- return 0;
- }
-
-
-
- #-------------------------------
- #
- # Code for version 3
- #
- #-------------------------------
-
-
- #include <stdio.h>
- #include <stdlib.h>
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
-
- line = malloc(256);
- linelen = 256;
-
- while (fgets(line, linelen, stdin) != NULL) {
- len = strlen(line);
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- fputs(line, stdout);
- }
- putc('\n', stdout);
- }
-
- return 0;
- }
-
-
-
- #-------------------------------
- #
- # Code for version 4
- #
- #-------------------------------
-
-
- #include <stdio.h>
- #include <stdlib.h>
-
- char *my_getline(FILE *fp, int *len_out);
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
-
- while ((line = my_getline(stdin, &len)) != NULL) {
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- fputs(line, stdout);
- }
- putc('\n', stdout);
- }
-
- return 0;
- }
-
-
- /*
- * my_getline
- *
- * Read a line of any length, store it in an internal buffer, and
- * return the internal buffer (along with a length value if desired).
- *
- * NOTE: Each line read will overwrite the previous line read. So,
- * make a copy of any line you want to keep around.
- *
- * Parameters:
- * fp - A FILE pointer open for reading.
- * len_out - Address to where to store the line length.
- *
- * Returns:
- * An internal buffer containing the line, or NULL on EOF or error.
- */
- char *my_getline(FILE *fp, int *len_out)
- {
- static int bufsize = 0;
- static char *buffer = NULL;
- int size, len, flag;
-
- /*
- * Initialize the internal buffer, if necessary.
- */
- if (buffer == NULL) {
- bufsize = 128;
- if ((buffer = malloc(bufsize)) == NULL)
- return NULL;
- }
-
- /*
- * Read the first part of the line.
- */
- flag = 0;
- buffer[bufsize-2] = '\0';
-
- if (fgets(buffer, bufsize, fp) == NULL)
- return NULL;
- else if (buffer[bufsize-2] == '\0' || buffer[bufsize-2] == '\n') {
- len = strlen(buffer);
- flag = 1;
- }
-
- /*
- * If the line is longer, then realloc the internal buffer and
- * read the next section of the line.
- */
- while (!flag) {
- size = bufsize - 1;
- bufsize += bufsize;
- if ((buffer = realloc(buffer, bufsize)) == NULL)
- return NULL;
-
- buffer[bufsize-2] = '\0';
- if (fgets(buffer + size, bufsize - size, fp) == NULL) {
- len = size;
- flag = 1;
- }
- else if (!buffer[bufsize-2] || buffer[bufsize-2] == '\n') {
- len = size + strlen(buffer + size);
- flag = 1;
- }
- }
-
- /*
- * Strip the newline from the line, if it's there.
- */
- if (buffer[len-1] == '\n')
- buffer[--len] = '\0';
-
- if (len_out) *len_out = len;
- return buffer;
- }
-
-
-
- #-------------------------------
- #
- # Code for version 5
- #
- #-------------------------------
-
-
- #include <stdio.h>
- #include <stdlib.h>
-
- typedef enum fr_inflg {
- FR_NOI = 0x00, /* no input flags */
- FR_CONV = 0x01, /* convert null chars to 127 */
- FR_NL = 0x02, /* treat nulls as newlines */
- FR_KEEP = 0x04 /* if NOMEM, keep partial line */
- } fr_inflg;
-
- typedef enum fr_outflg {
- FR_NOO = 0x00, /* no output flags */
- FR_EOF = 0x01, /* EOF was encountered */
- FR_NONL = 0x02, /* line did not end in newline */
- FR_NOMEM = 0x04, /* out of memory */
- FR_OFLOW = 0x08 /* allocation size_t overflow */
- } fr_outflg;
-
- char *freadln(const fr_inflg inf, fr_outflg *ouf, size_t *len, FILE *stream);
-
- #define CHAR_MAX 127
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
- fr_outflg outflag;
-
- while ((line = freadln(FR_NOI, &outflag, &len, stdin)) != NULL &&
- outflag != FR_EOF) {
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- fputs(line, stdout);
- }
- putc('\n', stdout);
-
- free(line);
- }
-
- return 0;
- }
-
-
- #define INIT_SIZE1 55 /* Fibonacci numbers */
- #define INIT_SIZE2 89
-
- char *freadln(const fr_inflg inf, fr_outflg *ouf, size_t *len, FILE *stream)
- {
- size_t oldsize = INIT_SIZE1, cursize = INIT_SIZE2, newsize;
- char *line, *pline, *limit, *new;
-
- *ouf = FR_NOO;
-
- pline = line = malloc(cursize);
-
- if (!line) {
- *ouf = FR_NOMEM;
- goto failure;
- }
-
- limit = line + cursize - 2; /* guarantee room for null */
-
- while (1) {
- int c = getc(stream);
-
- switch(c) {
- case '\0':
- if (inf & FR_NL) /* nulls terminate */
- goto badend;
- if (inf & FR_CONV) { /* nulls get replaced */
- c = CHAR_MAX;
- goto addchar;
- }
- goto addchar;
- break;
- case EOF:
- *ouf = FR_EOF;
- if (pline == line) /* first character? */
- goto success;
- badend: /* line end w/o newline */
- *ouf |= FR_NONL;
- case '\n':
- goto success; /* jump out of loop */
- addchar:
- default:
- *pline++ = c;
- break;
- }
-
- if (pline >= limit) { /* if buffer is full... */
- newsize = cursize + oldsize;
- if (newsize < cursize) { /* overflow! */
- *ouf = FR_OFLOW;
- if (inf & FR_KEEP)
- goto success;
- free(line);
- goto failure;
- }
- new = realloc(line, newsize);
- if (!new) {
- *ouf = FR_NOMEM;
- if (inf & FR_KEEP)
- goto success;
- free(line);
- goto failure;
- }
- oldsize = cursize;
- cursize = newsize;
- pline = new + (pline - line);
- line = new;
- limit = line + cursize - 2;
- }
- }
-
- success:
- *pline++ = '\0'; /* null-terminate */
- *len = pline - line - 1; /* calculate line length */
- new = realloc(line, *len + 1); /* try to trim buffer down */
- return (new) ? new : line; /* if cannot, ah well... */
-
- failure:
- return NULL;
- }
-
-
-
- #-------------------------------
- #
- # Code for version 7
- #
- #-------------------------------
-
- #include <stdio.h>
- #include <stdlib.h>
-
- char *my_getline(FILE *fp, int *len_out);
-
- int main()
- {
- int i, j, len, linelen;
- char ch, tch, *line;
-
- while ((line = my_getline(stdin, &len)) != NULL) {
- if (len > 0) {
- for (i=0,j=len-1; i < j; i++,j--) {
- tch = line[i];
- line[i] = line[j];
- line[j] = tch;
- }
-
- fputs(line, stdout);
- }
- putc('\n', stdout);
- }
-
- return 0;
- }
-
-
- /*
- * my_getline
- *
- * Read a line of any length, store it in an internal buffer, and
- * return the internal buffer (along with a length value if desired).
- *
- * NOTE: Each line read will overwrite the previous line read. So,
- * make a copy of any line you want to keep around.
- *
- * Parameters:
- * fp - A FILE pointer open for reading.
- * len_out - Address to where to store the line length.
- *
- * Returns:
- * An internal buffer containing the line, or NULL on EOF or error.
- */
- char *my_getline(FILE *fp, int *len_out)
- {
- static int bufsize = 0;
- static char *buffer = NULL;
- int len;
- char ch, *s;
-
- /*
- * Initialize the internal buffer, if necessary.
- */
- if (buffer == NULL) {
- bufsize = 128;
- if ((buffer = malloc(bufsize)) == NULL)
- return NULL;
- }
-
- /*
- * The main loop reading characters.
- */
- len = 0;
- s = buffer;
- while ((ch = *s++ = getc(stdin)) != '\n' && ch != EOF) {
- if (++len == bufsize) {
- bufsize += bufsize;
- if ((buffer = realloc(buffer, bufsize)) == NULL)
- return NULL;
- s = buffer + len;
- }
- }
-
- if (ch == EOF && len == 0)
- return NULL;
- else {
- s[-1] = '\0';
- if (len_out) *len_out = len;
- return buffer;
- }
- }
-
-
-
- #-------------------------------
- #
- # Code for version 8
- #
- #-------------------------------
-
- #include <stdio.h>
- #include <fcntl.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <sys/stat.h>
-
- int main(int argc, char *argv[])
- {
- int fd, size;
- char *s, *t, *w, *buffer, tch;
- struct stat sbuf;
-
- if (argc != 3) {
- fprintf(stderr, "Usage: prog infile outfile\n");
- exit(1);
- }
-
- if (stat(argv[1], &sbuf) == -1 ||
- (size = sbuf.st_size) == 0 ||
- (buffer = malloc(size + 1)) == NULL ||
- (fd = open(argv[1], O_RDONLY, 0666)) < 0 ||
- read(fd, buffer, size) != size) {
- fprintf(stderr, "Error during reading of input file.\n");
- exit(1);
- }
- close(fd);
-
- for (s=buffer; *s; s++) {
- for (t=s; *s && *s != '\n'; s++) ;
-
- if (s - t < 2)
- continue;
-
- for (w=s-1; t < w; t++,w--) {
- tch = *t;
- *t = *w;
- *w = tch;
- }
- }
-
- if ((fd = open(argv[2], O_WRONLY | O_CREAT, 0666)) < 0 ||
- write(fd, buffer, size) < 0) {
- fprintf(stderr, "Error during writing of output file.\n");
- exit(1);
- }
-
- return 0;
- }
-